conflicts_prefer(dplyr::filter)

cand_info_all <- read_csv("./data_clean/candidate_info.csv",
                          col_select = -1) 
matched_info <- read_csv("./data_clean/RQDA_KED_matched.csv")




file_attr_petition_clean <- 
  mutate(file_attributes, 
         unit_code_county = as.integer(ifelse(position != "MP",unit_code, NA)),
         unit_code_constituency = as.integer(ifelse(position == "MP", unit_code, NA))) |>
  dplyr::filter(file_type == "petition") |>
  dplyr::select(-c(file_appellant,
            file_appeal, 
            overturned,
            file_type))

ked_clean <- 
  mutate(cand_info_all,
         ElecType = dplyr::case_when(ElecType == "PARLIAMENTARY" ~ "MP",
                                         ElecType == "GUBERNATORIAL" ~ "governor",
                                         ElecType == "SENATORIAL" ~ "senator",
                                         ElecType == "WOMEN REP" ~ "women_rep"),
         CountyCode_only = dplyr::case_when(ElecType != "MP" ~ CountyCode)
         ) |>
  dplyr::distinct()



########################
#governor percentages
percent_by_race_gov <- ked_clean |> 
  filter(ElecType == "governor", ElecYear == 2013) |> 
  rename(ValidVotesPCT = `ValidVotes%`) |>
  group_by(CountyName, CountyCode) |> 
  spread(Position , ValidVotesPCT, sep = "_") |> 
  summarize_at(vars(starts_with("Position")), sum, na.rm =TRUE) |>
  mutate(margin1_2 = Position_1 - Position_2,
         race = "Governor") |>
  ungroup()


#women rep percentages
percent_by_race_wr <- ked_clean |> 
  filter(ElecType == "women_rep", ElecYear == 2013) |> 
  rename(ValidVotesPCT = `ValidVotes%`) |>
  group_by(CountyName, CountyCode) |> 
  spread(Position , ValidVotesPCT, sep = "_") |> 
  summarize_at(vars(starts_with("Position")), sum, na.rm =TRUE) |>
  ungroup()

#manual enter TRANS NZOIA from gazette
TNTOTAL <- 193361
percent_by_race_wr$Position_1[percent_by_race_wr$CountyName == "TRANS NZOIA"] <- 36384/TNTOTAL 
percent_by_race_wr$Position_2[percent_by_race_wr$CountyName == "TRANS NZOIA"] <- 35326 /TNTOTAL 

percent_by_race_wr <- percent_by_race_wr  |> 
  mutate(margin1_2 = Position_1 -Position_2,
         race = "Women's Rep")

#senator percentages
percent_by_race_sen <- ked_clean |> 
  filter(ElecType == "senator", ElecYear == 2013) |> 
  rename(ValidVotesPCT = `ValidVotes%`) |>
  group_by(CountyName, CountyCode) |> 
  spread(Position , ValidVotesPCT, sep = "_") |> 
  summarize_at(vars(starts_with("Position")), sum, na.rm =TRUE) |>
  mutate(margin1_2 = Position_1 -Position_2, 
         race = "Senate") 

#mp percentages
percent_by_race_mp <- ked_clean |> 
  filter(ElecType == "MP", ElecYear == 2013) |> 
  rename(ValidVotesPCT = `ValidVotes%`, ConstCode = `ConstCode-2012`) |>
  group_by(ConstCode) |> 
  spread(Position , ValidVotesPCT, sep = "_") |> 
  summarize_at(vars(starts_with("Position")), sum, na.rm =TRUE) |>
  mutate(margin1_2 = Position_1 -Position_2,
         race = "MP")

##### Now gets case info
govs <- file_attr_petition_clean |> 
  filter(position == "governor") |>
  mutate(county_code = as.numeric(unit_code), has_case = 1)

#gets all races with a petition and removes Baringo for which no record
mps <- file_attr_petition_clean |>
  filter(position == "MP") |>
  mutate(const_code = as.numeric(unit_code), has_case = 1) |>
  mutate(has_case = if_else(unit_name =="baringo_south", NA, has_case ),
         petitioner_candidate = ifelse(unit_name == "wajir_west", "no", petitioner_candidate))

#get's all races with a petition including candidate races
wrs <- file_attr_petition_clean |> 
  filter(position == "women_rep") |>
  mutate(county_code = as.numeric(unit_code),
         has_case = 1)

#issue with Kilifi
sens <- file_attr_petition_clean |> filter(position == "senator") |>
  mutate(county_code = as.numeric(unit_code),
         has_case = 1)

percent_by_race_gov <-
  left_join(percent_by_race_gov, 
            govs,
            by = c("CountyCode" = "county_code"))


#get rid of a an early struck out non-recorded case in Lamu
percent_by_race_wr2 <- left_join(percent_by_race_wr, wrs, by = c("CountyCode" = "county_code")) |>
  # filter(!(CountyName == "LAMU" & petitioner_last_name == "Bwana")) |>
  mutate(has_case = ifelse(CountyName == "LAMU", NA, has_case))

percent_by_race_sen <- 
  left_join(percent_by_race_sen, 
            sens,
            by = c("CountyCode" = "county_code")
            )

stopifnot(nrow(percent_by_race_sen) == 47)

percent_by_race_mp <- 
  left_join(percent_by_race_mp,
            mps,
            by = c("ConstCode" = "const_code")
            )

all_district <- 
  bind_rows(percent_by_race_gov,
            percent_by_race_sen,
            percent_by_race_wr2,
            percent_by_race_mp) |>
  mutate(`Race Type` = ifelse(race == "MP", "MP", "County"))


#take into consideration two struck out cases. 
#there are no reported rsults from trans nzoia in see if you can find
all_district <- all_district |> 
  mutate(group_win = cut(margin1_2, breaks = c(0, 10, 30, 50, 100), right = FALSE, include.lowest = TRUE)) |>
  mutate(has_case2 = ifelse(!is.na(has_case) & petitioner_candidate == "yes", 1, 0)) |>
  filter(!(petitioner_candidate == "no" & CountyName %in% c("KILIFI") & race == "Governor")) |> #remove two double counted citizen petitions 
  filter(!(petitioner_candidate == "no" & CountyName %in% c("LAMU") & race == "Women's Rep"))

percent_by_race_mp <- percent_by_race_mp |> 
  mutate(group_win = cut(margin1_2, breaks = c(0, 10, 30, 50, 100)), right = FALSE) |>
  mutate(has_case2 = ifelse(petitioner_candidate == "yes", 1, 0))


# this is the distribution
# vernor        Mp   Senator Women Rep 
# 13        46         7         5
# Fix to deal with 73 vs 71 cases

#if a case was fleetingly submitted and then withdrawn or struck out then we don't count it. 
sum(all_district$has_case, na.rm =TRUE)

all_district |> filter(has_case == 1) |>
  group_by(petitioner_candidate) |> tally()

all_district |> group_by(group_win) |> 
  summarize(#mean = mean(has_case2, na.rm = TRUE),
            num_cases = sum(has_case2, na.rm = TRUE),
            n= n()
            ) |>
  mutate(mean = num_cases/ n)


# get to work to show by subgroup
all_district |>  mutate(race_group = paste(race, group_win)) |>
  group_by(race_group) |> 
  dplyr::summarize(#mean = mean(has_case2, na.rm = TRUE),
    num_cases = sum(has_case2, na.rm = TRUE),
    n= n()) |>
  mutate(mean = num_cases/ n)

out_count <- all_district |> 
  mutate(race_group = paste(race, group_win)) |>
  group_by(`Race Type`, group_win) |> 
  dplyr::summarize(#mean = mean(has_case2, na.rm = TRUE),
    num_cases = sum(has_case2, na.rm = TRUE),
    n= n()) |>
  mutate(mean = num_cases/ n)

#the last issue is that there are two races that have two petitions for the same race
out_count2 <- out_count |> 
  mutate(`mean` = scales::percent(mean)) |>
  group_by(`Race Type`) |>
  rename(`Victory Margin` = group_win,
         `Petitions` = num_cases,
         `Total Races` = n,
         `Pct. Petitions` = mean) |>
  group_map( ~ .x |> adorn_totals("row") )


out_count2 <- bind_rows(out_count2)
out_count2 <- add_column(out_count2, 
                         "Level" = c(rep("County", 5), rep("MP", 5)),
                         .before = TRUE)


std_border <- fp_border(color="black")

#OUTPUT MARGIN OF VICTORY TABLE (TABLE 2)
flex_out_count <- flextable(out_count2)
flex_out_count <- merge_at(flex_out_count, j = 1,  i = c(1:5))
flex_out_count <- merge_at(flex_out_count, j = 1,  i = c(6:10))

flex_out_count <- hline(flex_out_count, i = c(4,5, 9), border = std_border )
doc <- read_docx()
doc <- body_add_flextable(doc, value = flex_out_count)
fileout <- tempfile(fileext = ".docx")
# fileout <- "test.docx" # uncomment to write in your working directory
print(doc, target = "tables/table1_marginsofvic.docx")


#show the two cases there there is not a consolidated file   
all_district |> 
  get_dupes(CountyName, race) |> 
  filter(!is.na(CountyName)) |> 
  select(CountyName, petitioner_candidate)


